
# import nltk
# nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

readme = stopwords.readme()
# print(readme)
readme = readme.replace('\n', ' ')
print(readme)

fileids = stopwords.fileids()
print(fileids)
words = stopwords.words('english')
# words = stopwords.words('chinese')
print(words)
raw = stopwords.raw('chinese')
print(raw.replace('\n',' '))


input_str = ' Today\' s weather is good, very windy and sunny, \
we have no classes in the afternoon, We have to play basketball tomorrow \
  good good'

tokens = word_tokenize(input_str)
tokens = [s.lower() for s in tokens]
test_words = set(tokens)
res = test_words.intersection(words)
print(res)
filter = [w for w in tokens if w not in words]
print(filter)